package com.daervin.svc.parser.sub;

import com.daervin.svc.common.constants.Category;
import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import com.daervin.svc.common.utils.DateTimeUtils;
import com.daervin.svc.common.utils.NumberUtil;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Html;

import java.util.Calendar;
import java.util.List;

import static com.daervin.svc.common.constants.SourceEnum.PING_WEST;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class PingWestSubPageParser extends RootSubParser {
    public PingWestSubPageParser(String... listUrl) {
        super(listUrl);
    }

    @Override
    public MatchOther processPage(Page page) {
        Html html = page.getHtml();
        String title = html.xpath("//section[@class=\"wire-title\"]//p//text()").get();
        String publishTime = html.xpath("//section[@class=\"wire-title\"]//span[@class=\"time\"]//text()").get();
        if (StringUtils.isEmpty(title) || title.contains("PW早报") || title.contains("早8点") || title.contains("早 8 点档") || StringUtils.isEmpty(publishTime)) {
            return MatchOther.NO;
        }
        List<String> descPList = html.xpath("//article[@class=\"wire-content\"]//p").all();

        StringBuilder desc = new StringBuilder();

        if (StringUtils.isEmpty(descPList)) {
            return MatchOther.NO;
        }
        Calendar bdateTmp = Calendar.getInstance();
        for (String pitem : descPList) {
            pitem = pitem.trim().replaceAll("(<p>|</p>|</a>)", "").replaceAll("<a\\b[^>]+\\bhref=\"([^\"]*)\"[^>]*>", "");
            desc.append(pitem);
        }
        if (StringUtils.isEmpty(desc) || desc.toString().contains("<img")) {
            return MatchOther.NO;
        }

        if (!StringUtils.isEmpty(publishTime)) {
            if (publishTime.contains("分钟前")) {
                publishTime = publishTime.replaceAll("分钟前", "");
                Integer minute = NumberUtil.safeParseNumber(publishTime, Integer.class);
                if (minute == null) {
                    return MatchOther.NO;
                }
                bdateTmp.add(Calendar.MINUTE, -minute);
            }
            if (publishTime.contains("小时前")) {
                publishTime = publishTime.replaceAll("小时前", "");
                Integer hour = NumberUtil.safeParseNumber(publishTime, Integer.class);
                if (hour == null) {
                    return MatchOther.NO;
                }
                bdateTmp.add(Calendar.HOUR_OF_DAY, -hour);
            }
        }

        NewsDTO news = new NewsDTO();
        news.setTitle(title);
        news.setDesc(desc.toString());
        news.setBelongDate(DateTimeUtils.longParse(bdateTmp.getTime()));
        news.setAnnouncer(PING_WEST.announcer);
        news.setLinks(page.getRequest().getUrl());
        news.setCategory(PING_WEST.category);
        if (title.contains("融资") || title.contains("领投") || title.contains("收购")) {
            news.setCategory(Category.FINANCE.ordinal());
        }

        page.putField(Constants.PARSER_RESULT_ITEM, news);
        return MatchOther.YES;
    }
}
